library(biomaRt)
biomaRt::listEnsemblArchives()
listMarts()
mart_aug2020 <- biomaRt::useMart(biomart = "ENSEMBL_MART_ENSEMBL",
dataset = "hsapiens_gene_ensembl",
host = "http://aug2020.archive.ensembl.org")
head(listAttributes(mart_aug2020), n = 100)
grep( "entrez", listAttributes(mart_aug2020)$name,value = T)
t2g_aug2020 <- biomaRt::getBM(attributes = c("entrezgene_id","external_gene_name", "ensembl_gene_id", "ensembl_gene_id_version", "gene_biotype", "transcript_biotype","chromosome_name", "band", "transcript_length", "start_position", "end_position", "strand"), mart = mart_aug2020)
t2g_aug2020 <- dplyr::rename(t2g_aug2020, entrez_gene = entrezgene_id, ext_gene = external_gene_name, ens_gene = ensembl_gene_id, ens_gene_ver = ensembl_gene_id_version)
save(list = c("t2g_aug2020", "mart_aug2020"), file = "/researchers/antonio.ahn/general/R/Download_resources/RData/mart_aug2020.RData")
load("/researchers/antonio.ahn/general/R/Download_resources/RData/mart_aug2020.RData")
# apr2019 is the last version to not have converged the lncRNA biotypes into just one group
# Starting from July2019, lncRNA biotypes are converged to a single "lncRNA" category.... why??
load("/researchers/antonio.ahn/general/R/Download_resources/RData/mart_apr2019.RData")
fcounts <- read_tsv(file = "/researchers/krutika.ambani/Goel_lab_members/Keefe_Chan/220714_sgRB1_sgTP53_sgControl_DMSO_DOX_LY/results/bamfiles/featurecounts_stranded_2/featurecounts_stranded_2.txt", skip = 1)
# 60656 genes (rows) and 48 samples (columns)
dim(fcounts_mat)
# 15111 genes with zero counts in all 12 samples
table(rowSums(fcounts_mat == 0) == 48)
# Here i chose to keep genes that have higher or equal to count than 5 in at least 3 or more samples (there are 3 samples in my smallest comparison group)
index <- rowSums(fcounts_mat > 5) >= 3
#index
#FALSE TRUE
#38707 21949
table(index)
# This reduces the number of genes
# filtered matrix = fmat
fcounts_Fmat <- fcounts_mat[index,]
# no duplications in the ENSG ID names
rownames(fcounts_Fmat) %>% duplicated %>% table
rownames(fcounts_mat) %>% duplicated %>% table
write_tsv(fcounts_Fmat %>% as_tibble(
rownames = "ensembl_gene_id"), "/researchers/krutika.ambani/Goel_lab_members/Keefe_Chan/220714_sgRB1_sgTP53_sgControl_DMSO_DOX_LY/R_analysis/220714_intiial_QC_AA/1.QC_exploratory/data/220714_sgRB1_sgTP53_sgControl_DMSO_DOX_LY_fcounts_Fmat.txt")
## converting counts to integer mode
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates
## mean-dispersion relationship
## final dispersion estimates
## fitting model and testing
## -- replacing outliers and refitting for 8 genes
## -- DESeq argument 'minReplicatesForReplace' = 7
## -- original counts are preserved in counts(dds)
## estimating dispersions
## fitting model and testing
dds <- DESeqDataSetFromMatrix(fcounts_Fmat, colData = sample_information, design = ~ treatment)
# adding rowData. This is not required... dunno why i did this previously
table(rownames(dds) %in% t2g_aug2020$ens_gene_ver)
rowData(dds) <- data.frame(ens_gene_ver = rownames(dds), SYMBOL = t2g_aug2020$ext_gene[match(rownames(dds), t2g_aug2020$ens_gene_ver)])
rld <- rlog(dds) %>% assay
save(list = c("dds", "rld"), file = "/researchers/krutika.ambani/Goel_lab_members/Keefe_Chan/220714_sgRB1_sgTP53_sgControl_DMSO_DOX_LY/R_analysis/220714_intiial_QC_AA/1.QC_exploratory/data/rld.RData")
load("/researchers/krutika.ambani/Goel_lab_members/Keefe_Chan/220714_sgRB1_sgTP53_sgControl_DMSO_DOX_LY/R_analysis/220714_intiial_QC_AA/1.QC_exploratory/data/rld.RData")
# adding in the gene symbols instead of ENSG IDs
table(rownames(rld) %in% t2g_aug2020$ens_gene_ver)
##
## TRUE
## 21949
rownames(rld) <- t2g_aug2020$ext_gene[match(rownames(rld), t2g_aug2020$ens_gene_ver)]
plot_grid(a1,a2)
plot_grid(a1,a2)
plot_grid(a1,a2)
plot_grid(a1,a2)
Preparing the data frame
# the top 50 genes that drives PC1. An arbritrary threshold needs to be applied to get the top genes.
pca_results$rotation[,"PC1"] %>% sort(decreasing=TRUE) %>% head(n=100)
## SMC2 POC1A TNFAIP8L1 SAPCD2 CENPI FANCD2 GRB14
## 0.03750883 0.03748773 0.03743238 0.03739268 0.03728621 0.03725676 0.03718276
## ERCC6L SKP2 GAS2L3 PSRC1 SMC4 CDCA8 ZWINT
## 0.03713609 0.03713383 0.03711240 0.03709288 0.03702653 0.03701940 0.03698839
## BUB1B LIG1 DTYMK NSD2 KIF15 H2AX BUB1
## 0.03696752 0.03696719 0.03694847 0.03694510 0.03692765 0.03692608 0.03691568
## CENPE GPSM2 TACC3 RECQL4 PLK4 SPAG5 ASPM
## 0.03691253 0.03689612 0.03689238 0.03688875 0.03687482 0.03686878 0.03686270
## NMU KIF20B CDC20 EME1 SYCE2 KIF14 MAD2L1
## 0.03686112 0.03683519 0.03682917 0.03682406 0.03681882 0.03681704 0.03679000
## PRR11 SGO1 ARHGAP11A DSCC1 ZNF732 CBX5 DHFR
## 0.03677434 0.03675758 0.03673881 0.03670795 0.03670726 0.03670398 0.03667945
## CENPW ACBD7 FANCB CIP2A FANCG SHCBP1 OIP5
## 0.03666311 0.03666023 0.03664010 0.03662291 0.03661729 0.03660820 0.03656629
## TPX2 GINS4 BLM ANLN CCNB2 SPC24 CENPN
## 0.03656209 0.03655261 0.03653267 0.03653258 0.03652636 0.03651757 0.03651187
## PIF1 GTSE1 PBK CDC25C KIF11 H2AC20 MXD3
## 0.03649454 0.03648110 0.03647684 0.03647548 0.03646893 0.03646545 0.03645719
## DLX3 RAD54L PIMREG DLGAP5 YBX2 ITGAE PLK1
## 0.03645201 0.03644690 0.03643359 0.03642469 0.03641981 0.03640065 0.03640036
## RADIL CENPF NEIL3 AC068831.6 KIFC1 TRIP13 RAD51
## 0.03639542 0.03636182 0.03635094 0.03633139 0.03631553 0.03631423 0.03630197
## KNL1 DMBX1 METTL7A CDCA7L DNA2 KIF4A RACGAP1
## 0.03629742 0.03628134 0.03626367 0.03626030 0.03624971 0.03622757 0.03622683
## DEPDC1 TUBA1B KHK KIF23 KNSTRN DPF1 BRCA2
## 0.03622533 0.03622332 0.03620633 0.03619628 0.03615721 0.03613347 0.03612643
## ATAD2 CKS1B BIRC5 CDKN2C TK1 SKA3 PSIP1
## 0.03611662 0.03611167 0.03608710 0.03608033 0.03607579 0.03606540 0.03605708
## CDCA3 SPC25
## 0.03605316 0.03605126
pca_results$rotation[,"PC2"] %>% sort(decreasing=TRUE) %>% head(n=100)
## COL4A5 PCDHGB7 FRAS1 SDC2 THEG MAOB PGR
## 0.09612437 0.09580860 0.09290137 0.09243446 0.08892456 0.08840027 0.08697228
## SYT13 MGP SNCG PCDHA7 OXTR PCDHGA2 LYPD1
## 0.08696684 0.08552496 0.08465830 0.08399895 0.08346819 0.08334863 0.08123307
## COL4A6 EPHA7 PCDHB3 CPQ PCDHB2 VIM AREG
## 0.08084250 0.08054588 0.07982182 0.07968364 0.07968265 0.07822765 0.07798733
## PCDHA6 SPOCK1 SLCO2A1 RAB7B MOXD1 WDR90 NELL2
## 0.07724491 0.07565791 0.07534341 0.07493638 0.07460475 0.07369086 0.07168905
## KLK5 STC1 ARHGAP29 ST8SIA1 S100A2 CLDN9 ZNF365
## 0.07119041 0.07116868 0.06969321 0.06956936 0.06878089 0.06861891 0.06855880
## PLEKHA4 GPR37 LGI2 ENC1 SLC38A11 KRTAP3-1 KCNJ8
## 0.06812500 0.06730398 0.06582181 0.06580688 0.06576954 0.06496009 0.06478332
## ISX ADAM19 CALCR AMIGO2 KCNK2 ST6GALNAC4 LHX1
## 0.06464701 0.06405167 0.06392250 0.06359578 0.06343412 0.06330482 0.06268644
## AASS SERPINE1 KLK6 NPY1R STUM GSTM1 VSTM2L
## 0.06231461 0.06116022 0.05997918 0.05924415 0.05887327 0.05841534 0.05838649
## FAM131B TNC CCN2 TMPRSS6 GASK1B CADPS2 PCDHB13
## 0.05815400 0.05798170 0.05777493 0.05719736 0.05639570 0.05606980 0.05598102
## UBE2QL1 CD22 SERPINA1 MEF2C MAB21L4 PGM2L1 KRT16
## 0.05475407 0.05395560 0.05388752 0.05369156 0.05350433 0.05341584 0.05289013
## CDH26 SPRR3 FBN2 CLCF1 FAM83E NECTIN3 ADAMTS9
## 0.05277602 0.05225556 0.05170139 0.05152021 0.05151048 0.05142750 0.05135598
## DUSP2 BMP7 MACC1 CNTNAP2 PHLDB2 IL18 ANK2
## 0.05113602 0.05084087 0.05060155 0.05049239 0.05043758 0.05036053 0.05016012
## STAT6 MAP1B AKR1B10 AKAP12 FMN2 RNF217 TIMP2
## 0.04992152 0.04978040 0.04970297 0.04951616 0.04906101 0.04871050 0.04865692
## ASB9 TP63 LRRTM3 COL17A1 CDSN SERPINA5 DNER
## 0.04865247 0.04861034 0.04798018 0.04790406 0.04770761 0.04759981 0.04721390
## KRT20 WLS
## 0.04707634 0.04679574
##
## Attaching package: 'PCAtools'
## The following objects are masked from 'package:stats':
##
## biplot, screeplot
## -- removing the lower 10% of variables based on variance
## Warning: Removed 2 row(s) containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
ggplot_gene_all("KRT20")
ggplot_gene_all("KRT4")
ggplot_gene_all("EMP1")
ggplot_gene_all("UGT1A6")
ggplot_gene_all("ABCA12")
ggplot_gene_all("DIO2")
ggplot_gene_all("ASB9")
ggplot_gene_all("IL6")
ggplot_gene_all("MMP1")
ggplot_gene_all("IL32")
ggplot_gene_all("PLAUR")
PC3_100genes <- pca_results$rotation[,"PC3"] %>% sort(decreasing=TRUE) %>% head(n=100) %>% names
heatmap_mat <- norm_mat[PC3_100genes,]
# generate z-score
scaled.mat <- t(scale(t(heatmap_mat)))
### annotate column
columnfactor <- sample_information %>%
unite(col = "group" , c(knockout,clone,treatment), sep = "_") %>% .$group
annotation_column <- data.frame(row.names = colnames(scaled.mat), group = columnfactor)
# 8 groups to color
annotation_column$group %>% unique %>% length
## [1] 16
### annotate rows. no need here
#table(rownames(scaled.mat) == names(cut_output))
#annotation_row <- data.frame(row.names = names(rownames_cluster_new), cluster = #factor(paste0("c",rownames_cluster_new), levels = c(paste0("c",1:20))))
### annotate colors
# annotate colors: column
#startrek_col <- ggsci::pal_startrek()(7)[1:4]
#names(startrek_col) <- c("DMSO","DOX","LY","RAD")
#paired_color <- brewer.pal(8,"Paired")
#names(paired_color) <- unique(annotation_column$group)
#npg_col <- pal_npg()(8)[3:4]
#names(npg_col) <- c("sig", "nonsig")
# annotate colors: rows
# row_colors <- c(brewer.pal(12,"Paired")[9:10], brewer.pal(8,"Dark2"))
column_colors <- c(brewer.pal(8,"Dark2"), ggsci::pal_simpsons()(8))
names(column_colors) <- unique(annotation_column$group)
#row_colors <- colorRampPalette(c("blue", "purple","green", "white", "orange","red"))(20)
#names(row_colors) <- paste0("c",1:20)
#row_colors <- pals::stepped()[1:20]
#names(row_colors) <- paste0("c",1:20)
# annotate colors: put all together
anno_colors <- list(group = column_colors)
### other parameters
scaled.mat %>% max
## [1] 3.277022
scaled.mat %>% min
## [1] -3.158797
breaksList <- seq(-3, 3, length.out = 100)
# col.pan <- colorRampPalette(c("purple","black", "yellow"))(100)
col.pan <- colorRampPalette(c("black","dodgerblue3", "white", "orange","red"))(100)
pheatmap::pheatmap(scaled.mat, col= col.pan, cluster_rows = F, cluster_cols = F, show_rownames = T, show_colnames=T, annotation_col = annotation_column, border_color = FALSE, legend = T,fontsize_row = 4,annotation_colors = anno_colors, treeheight_col = 20, breaks = breaksList, clustering_distance_rows = "euclidean",clustering_method = "complete")
library(msigdbr)
library(clusterProfiler)
h_gene_sets = msigdbr(species = "Homo sapiens", category = "H")
c2_gene_sets = msigdbr(species = "Homo sapiens", category = "C2")
c2_gene_sets <- c2_gene_sets %>% filter(grepl("REACTOME|KEGG", c2_gene_sets$gs_name))
c5_gene_sets = msigdbr(species = "Homo sapiens", category = "C5")
c5_gene_sets <- c5_gene_sets %>% filter(!grepl("HP_", c5_gene_sets$gs_name))
MSigDB.ofinterest <- rbind(h_gene_sets, c2_gene_sets, c5_gene_sets)
MSigDB.ofinterest <- MSigDB.ofinterest %>% dplyr::select(gs_name, gene_symbol)
PC3_hypergeo_output <- enricher(PC3_100genes, TERM2GENE = MSigDB.ofinterest)
PC3_hypergeo_output %>% as_tibble %>% DT::datatable()
pca_results_lncRNA <- prcomp(t(rld_lncRNA[select,]), center =TRUE, scale = TRUE)
# the top 50 genes that drives PC1. An arbritrary threshold needs to be applied to get the top genes.
pca_results_lncRNA$rotation[,"PC1"] %>% sort(decreasing=TRUE) %>% head(n=100)
## SNHG19 NAPA-AS1 SNHG3 AL441992.1 LINC00997 PXN-AS1
## 0.05510266 0.05485047 0.05484211 0.05462298 0.05425790 0.05409434
## PITPNA-AS1 AL513165.1 AC103746.1 LINC01963 AC137932.1 KIFC1
## 0.05400740 0.05382320 0.05376251 0.05350812 0.05341298 0.05317138
## AC092718.4 AC010642.2 AP002360.1 AC009005.1 MINCR AC091178.2
## 0.05314825 0.05302811 0.05288654 0.05283861 0.05280396 0.05278254
## LINC02762 DANCR MIR924HG AL359399.1 PCCA-DT AC012470.1
## 0.05268556 0.05243747 0.05231806 0.05226991 0.05192394 0.05158491
## AL138966.2 ATP2A1-AS1 LINC01315 AP001505.1 AL356481.3 AC018645.3
## 0.05155656 0.05150577 0.05149731 0.05147394 0.05143898 0.05143008
## AP002381.2 AC011603.3 AC125807.2 EMSLR LINC02506 AC004540.2
## 0.05141059 0.05124142 0.05118889 0.05108663 0.05089685 0.05088656
## AL133215.2 AC091057.1 AC069148.1 AL132857.1 AC020765.2 AC004540.1
## 0.05086646 0.05064589 0.05044579 0.05032377 0.05005581 0.04984836
## AC002310.2 ADAMTS19-AS1 AC006026.3 AC008555.4 AL158212.3 AL136419.3
## 0.04969167 0.04960804 0.04949045 0.04947325 0.04941449 0.04928650
## PRKG1-AS1 MIR600HG SDK1-AS1 AC026401.3 AL589843.1 KCTD21-AS1
## 0.04906646 0.04894267 0.04889751 0.04851915 0.04832020 0.04822915
## SNHG6 AC007106.1 LINC01135 AC012146.1 GLIDR AC005746.2
## 0.04818828 0.04817471 0.04811825 0.04805854 0.04793408 0.04786549
## SUCLG2-AS1 AC005746.3 FIRRE ELOVL2-AS1 SNHG10 AC091563.1
## 0.04771022 0.04764743 0.04760337 0.04714158 0.04680526 0.04670420
## DGUOK-AS1 ZNF710-AS1 ARRDC1-AS1 AC012615.1 AC136297.1 AC105046.1
## 0.04644262 0.04644191 0.04574321 0.04570545 0.04557971 0.04443829
## TMPO-AS1 AC127070.1 AL031663.3 HAS2-AS1 CCDC84-DT LINC01016
## 0.04437070 0.04420862 0.04401237 0.04223360 0.04161276 0.04156050
## AC061992.2 U47924.2 SNHG21 AC108479.1 KDM4A-AS1 AL390294.1
## 0.04145478 0.04100776 0.04084811 0.04076751 0.03826313 0.03774296
## MIR9-3HG AC074135.1 MELTF-AS1 DDX11-AS1 TYMSOS AC083967.1
## 0.03735406 0.03556548 0.03506964 0.03410505 0.03407073 0.03291299
## AC138904.1 AL606500.1 AC132192.2 AL118516.1 AC010641.2 AC021134.1
## 0.03244705 0.03236186 0.03179028 0.03093279 0.03089415 0.02943644
## AC105383.1 LINC00683 LHX1-DT AC104663.1
## 0.02860864 0.02758960 0.02345337 0.01112784
pca_results_lncRNA$rotation[,"PC2"] %>% sort(decreasing=TRUE) %>% head(n=100)
## AC025048.4 AC015912.3 RN7SL832P AC008610.1 AC005520.5 FOXD2-AS1
## 0.12494027 0.11973172 0.11972264 0.11707063 0.11445560 0.11064686
## AL122058.1 AL118516.1 EML2-AS1 AL807742.1 AC012640.1 AC011498.1
## 0.10993891 0.10911781 0.10767850 0.10304868 0.10261978 0.10195122
## AC005363.2 AL357556.4 AC100791.1 LINC00052 AL021155.4 AC132192.2
## 0.10125004 0.10033364 0.09746521 0.09737603 0.09736656 0.09717165
## AL121944.2 AC123768.1 AC022506.2 FALEC AC100861.2 AC006460.1
## 0.09687361 0.09672976 0.09595125 0.09513574 0.09285605 0.09147663
## AC010624.5 AC007365.1 AL606500.1 FLJ31104 LINC02235 AC111149.2
## 0.08937272 0.08882696 0.08798254 0.08630194 0.08209807 0.08026034
## AC005005.4 AP001160.1 AC010978.1 AC025279.1 AL121936.1 AL109840.2
## 0.08025740 0.08022511 0.07941137 0.07931164 0.07815328 0.07735621
## DDX11-AS1 SNHG21 LINC02605 AC007389.2 CCDC84-DT AC116025.2
## 0.07728164 0.07685225 0.07684033 0.07566794 0.07498532 0.07495090
## LINC02233 TMPO-AS1 KDM4A-AS1 MELTF-AS1 AC010327.7 LINC01345
## 0.07478607 0.07400290 0.07359528 0.07350641 0.07329432 0.07327695
## AC044849.1 MIR9-3HG LINC01517 AC108479.1 U47924.2 SNHG10
## 0.07301511 0.07276381 0.07185499 0.07050353 0.07036831 0.06938289
## AL133370.1 AP003119.3 AC009093.4 LINC01476 TYMSOS LHX1-DT
## 0.06796496 0.06735050 0.06709791 0.06650317 0.06554956 0.06481574
## AC108134.2 LUCAT1 LINC02154 LINC02672 EGOT LNCAROD
## 0.06465599 0.06448604 0.06368642 0.06340953 0.06302292 0.06260716
## AC006076.1 CTC-338M12.4 NECTIN3-AS1 ARRDC1-AS1 DLGAP1-AS2 AC012615.1
## 0.06240301 0.06096335 0.05971948 0.05950587 0.05891872 0.05876340
## AC136297.1 MANCR AC107959.3 AC245041.2 LINC01484 AC026401.3
## 0.05869429 0.05811877 0.05803273 0.05767723 0.05755152 0.05716911
## AC021218.1 AL357033.4 AL365181.3 CD27-AS1 LINC00944 LURAP1L-AS1
## 0.05691210 0.05675830 0.05671149 0.05663044 0.05656194 0.05519646
## AP003119.2 AC092171.3 MIR4458HG AC083967.1 AC083837.1 AL365181.2
## 0.05515048 0.05492417 0.05489388 0.05428222 0.05418090 0.05392374
## LINC01629 FAM30A AC027288.3 ZNF295-AS1 AL590666.2 LINC01285
## 0.05382801 0.05376927 0.05357262 0.05311695 0.05291566 0.05266594
## AC092171.5 LINC02246 STAU2-AS1 AC026785.3
## 0.05228642 0.05183273 0.05165222 0.05160941
a1 <- dplyr::filter(df.PCA_lncRNA_500) %>%
ggplot(aes(x = PC1, y = PC2, col = treatment, shape = knockout)) +
geom_point(size = 7) +
geom_text_repel(aes(label = samples), size = 3) +
# scale_x_continuous(limits = c(-150, 150)) +
scale_color_manual(values = col_npg5) +
labs(shape = "sample", col = "treatment",
x = paste0("PC1","(",round(pca_lncRNA_results$sdev[1]^2/sum(pca_lncRNA_results$sdev^2)*100,1), "%)"),
y = paste0("PC2","(",round(pca_lncRNA_results$sdev[2]^2/sum(pca_lncRNA_results$sdev^2)*100,1), "%)")) +
ggtitle("lncRNA (top 500 highest variance)") +
# facet_grid( . ~ transformation) +
theme_bw() +
theme(axis.text = element_text(color = "black", size = 15), axis.title = element_text(color = "black", size = 20), plot.title = element_text(hjust=0.5))
a2 <- dplyr::filter(df.PCA_lncRNA_500) %>%
ggplot(aes(x = PC1, y = PC3, col = treatment, shape = knockout)) +
geom_point(size = 7) +
geom_text_repel(aes(label = samples), size = 3) +
# scale_x_continuous(limits = c(-150, 150)) +
scale_color_manual(values = col_npg5) +
labs(shape = "sample", col = "treatment", x = paste0("PC1","(",round(pca_lncRNA_results$sdev[1]^2/sum(pca_lncRNA_results$sdev^2)*100,1), "%)"),
y = paste0("PC3","(",round(pca_lncRNA_results$sdev[3]^2/sum(pca_lncRNA_results$sdev^2)*100,1), "%)")) +
ggtitle("lncRNA (top 500 highest variance)") +
# facet_grid( . ~ transformation) +
theme_bw() +
theme(axis.text = element_text(color = "black", size = 15), axis.title = element_text(color = "black", size = 20), plot.title = element_text(hjust=0.5))
a3 <- dplyr::filter(df.PCA_lncRNA_500) %>%
ggplot(aes(x = PC2, y = PC3, col = treatment, shape = knockout)) +
geom_point(size = 7) +
geom_text_repel(aes(label = samples), size = 3) +
# scale_x_continuous(limits = c(-150, 150)) +
scale_color_manual(values = col_npg5) +
labs(shape = "sample", col = "treatment",
x = paste0("PC2","(",round(pca_lncRNA_results$sdev[2]^2/sum(pca_lncRNA_results$sdev^2)*100,1), "%)"),
y = paste0("PC3","(",round(pca_lncRNA_results$sdev[3]^2/sum(pca_lncRNA_results$sdev^2)*100,1), "%)")) +
ggtitle("lncRNA (top 500 highest variance)") +
# facet_grid( . ~ transformation) +
theme_bw() +
theme(axis.text = element_text(color = "black", size = 15), axis.title = element_text(color = "black", size = 20), plot.title = element_text(hjust=0.5))
a1
a2
save.image("/researchers/krutika.ambani/Goel_lab_members/Keefe_Chan/220714_sgRB1_sgTP53_sgControl_DMSO_DOX_LY/R_analysis/220714_intiial_QC_AA/1.QC_exploratory/data/QC_exploratory.RData")
load("/researchers/krutika.ambani/Goel_lab_members/Keefe_Chan/220714_sgRB1_sgTP53_sgControl_DMSO_DOX_LY/R_analysis/220714_intiial_QC_AA/1.QC_exploratory/data/QC_exploratory.RData")